/*==============================================================================
Merge and modify codes for time series of US initial conditions

==============================================================================*/

clear
set more off

* ==============================================================================
* Bring in 1970 Census
* ==============================================================================

use "$dta_files/IC_US_CENS1970"

drop age_total
foreach var of varlist age* edatt* inmigration* outmigration* migration_yrs ///
					   home_* hh_size {
	rename `var' ts_`var'
}

keep ts* nuts year

tempfile 1970census
save `1970census.dta'

* ==============================================================================
* Get EMP female time series 
* ==============================================================================

use nuts year EMP* using "$dta_files/IC_US_CENS1970", clear
merge 1:1 nuts year using "$dta_files/US_CENS1970_labor"
merge 1:1 nuts year using "$dta_files/US_labor", update nogen

gen EMP = . 
replace EMP = EMP_US 
replace EMP = EMP_CENS1970 if EMP==.

rename EMP_female EMP_female_CENS1970
gen EMP_female=.
replace EMP_female = ts_EMP_female 
replace EMP_female = EMP_female_CENS1970 if EMP==.

keep nuts year EMP EMP_female

rename EMP_female ts_EMP_female
rename EMP ts_EMP

tempfile ts_female_emp
save `ts_female_emp'

* ==============================================================================
* Prepare data for merge in initial conditions for age, edatt
* migration & hh_size already collapsed into WVS-regional level 
* ==============================================================================

use "$dta_files/TS_US_CENS.dta" if (year==2000 & acs==0) | year!=2000, clear 
tempfile census_acs
save `census_acs.dta'

use "$dta_files/TS_US_BEA.dta", clear

drop if statefip==43 //drop Puerto Rico

gen region = .
replace region=	840004 if statefip == 1
replace region=	840009 if statefip == 2
replace region=	840008 if statefip == 4
replace region=	840005 if statefip == 5
replace region=	840010 if statefip == 6
replace region=	840008 if statefip == 8
replace region=	840001 if statefip == 9
replace region=	840003 if statefip == 10
replace region=	840003 if statefip == 11
replace region=	840003 if statefip == 12
replace region=	840003 if statefip == 13
replace region=	840003 if statefip == 14
replace region=	840009 if statefip == 15
replace region=	840008 if statefip == 16
replace region=	840006 if statefip == 17
replace region=	840006 if statefip == 18
replace region=	840007 if statefip == 19
replace region=	840007 if statefip == 20
replace region=	840004 if statefip == 21
replace region=	840005 if statefip == 22
replace region=	840001 if statefip == 23
replace region=	840003 if statefip == 24
replace region=	840001 if statefip == 25
replace region=	840006 if statefip == 26
replace region=	840007 if statefip == 27
replace region=	840004 if statefip == 28
replace region=	840007 if statefip == 29
replace region=	840008 if statefip == 30
replace region=	840007 if statefip == 31
replace region=	840008 if statefip == 32
replace region=	840001 if statefip == 33
replace region=	840002 if statefip == 34
replace region=	840008 if statefip == 35
replace region=	840002 if statefip == 36
replace region=	840003 if statefip == 37
replace region=	840007 if statefip == 38
replace region=	840006 if statefip == 39
replace region=	840005 if statefip == 40
replace region=	840009 if statefip == 41
replace region=	840002 if statefip == 42
replace region=	840001 if statefip == 44
replace region=	840003 if statefip == 45
replace region=	840007 if statefip == 46
replace region=	840004 if statefip == 47
replace region=	840005 if statefip == 48
replace region=	840008 if statefip == 49
replace region=	840001 if statefip == 50
replace region=	840003 if statefip == 51
replace region=	840009 if statefip == 53
replace region=	840003 if statefip == 54
replace region=	840006 if statefip == 55
replace region=	840008 if statefip == 56

tempfile US_states
save `US_states.dta'

* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* By Region
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

collapse (sum) ts*, by(region year)
gen census_region =1 

append using `US_states.dta'

gen nuts=statefip
replace nuts= region if statefip==.


* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* merge in initial conditions for age, school enrollment migration & hh_size 
* already collapsed into WVS-regional level 
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

merge 1:1 nuts year using `census_acs.dta', update 
drop _merge

* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* merge in migration data from the CPS  
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

merge 1:1  nuts year using "$dta_files/TS_US_CPS.dta" ,update nogen
drop _merge

********************************************************************************

tostring nuts, replace force
replace nuts = "US: AK" if nuts=="2" 
replace nuts = "US: AL" if nuts=="1" 
replace nuts = "US: AR" if nuts=="5" 
replace nuts = "US: AS" if nuts=="60" 
replace nuts = "US: AZ" if nuts=="4" 
replace nuts = "US: CA" if nuts=="6" 
replace nuts = "US: CO" if nuts=="8" 
replace nuts = "US: CT" if nuts=="9" 
replace nuts = "US: DC" if nuts=="11" 
replace nuts = "US: DE" if nuts=="10" 
replace nuts = "US: FL" if nuts=="12" 
replace nuts = "US: GA" if nuts=="13" 
replace nuts = "US: GU" if nuts=="66" 
replace nuts = "US: HI" if nuts=="15" 
replace nuts = "US: IA" if nuts=="19" 
replace nuts = "US: ID" if nuts=="16" 
replace nuts = "US: IL" if nuts=="17" 
replace nuts = "US: IN" if nuts=="18" 
replace nuts = "US: KS" if nuts=="20" 
replace nuts = "US: KY" if nuts=="21" 
replace nuts = "US: LA" if nuts=="22" 
replace nuts = "US: MA" if nuts=="25" 
replace nuts = "US: MD" if nuts=="24" 
replace nuts = "US: ME" if nuts=="23" 
replace nuts = "US: MI" if nuts=="26" 
replace nuts = "US: MN" if nuts=="27" 
replace nuts = "US: MO" if nuts=="29" 
replace nuts = "US: MS" if nuts=="28" 
replace nuts = "US: MT" if nuts=="30" 
replace nuts = "US: NC" if nuts=="37" 
replace nuts = "US: ND" if nuts=="38" 
replace nuts = "US: NE" if nuts=="31" 
replace nuts = "US: NH" if nuts=="33" 
replace nuts = "US: NJ" if nuts=="34" 
replace nuts = "US: NM" if nuts=="35" 
replace nuts = "US: NV" if nuts=="32" 
replace nuts = "US: NY" if nuts=="36" 
replace nuts = "US: OH" if nuts=="39" 
replace nuts = "US: OK" if nuts=="40" 
replace nuts = "US: OR" if nuts=="41" 
replace nuts = "US: PA" if nuts=="42" 
replace nuts = "US: PR" if nuts=="72" 
replace nuts = "US: RI" if nuts=="44" 
replace nuts = "US: SC" if nuts=="45" 
replace nuts = "US: SD" if nuts=="46" 
replace nuts = "US: TN" if nuts=="47" 
replace nuts = "US: TX" if nuts=="48" 
replace nuts = "US: UT" if nuts=="49" 
replace nuts = "US: VA" if nuts=="51" 
replace nuts = "US: VI" if nuts=="78" 
replace nuts = "US: VT" if nuts=="50" 
replace nuts = "US: WA" if nuts=="53" 
replace nuts = "US: WI" if nuts=="55" 
replace nuts = "US: WV" if nuts=="54" 
replace nuts = "US: WY" if nuts=="56" 
replace nuts = "Entire U.S." if region==0 | nuts=="0"
replace nuts = "US: New England" if region== 840001  & census_region== 1|nuts== "840001"
replace nuts = "US: Middle Atlantic States" if region==840002 & census_region== 1|nuts== "840002"
replace nuts = "US: South Atlantic" if region==840003	& census_region== 1|nuts== "840003"
replace nuts = "US: East South Central" if region==840004 & census_region== 1	|nuts== "840004"
replace nuts = "US: West South Central" if region==840005 & census_region== 1	|nuts== "840005"
replace nuts = "US: East North Central" if region==840006 & census_region== 1|nuts== "840006"
replace nuts = "US: West North Central" if region==840007 & census_region== 1 	|nuts== "840007"
replace nuts = "US: Rocky Mountain state" if region==840008	& census_region== 1 |nuts== "840008"
replace nuts = "US: Northwest" if region==840009 & census_region== 1 |nuts== "840009"
replace nuts = "US: California" if region==840010 & census_region== 1 |nuts== "840010"

********************************************************************************

* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Merge in 1970 Census
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
merge 1:1 nuts year using `1970census.dta', update
drop _merge

drop if nuts=="Entire U.S."

tempfile append
save `append.dta'

* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Entire U.S.
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

tempfile precollapse
save `precollapse.dta'

drop if census_region==1 //don't want to double count data from census regions

egen weight = rsum(ts_age_0_4 - ts_age_70_plus)

collapse (mean) ts_hh_size [aw=weight], by(year)

gen region=0 

tempfile entire_us_merge
save `entire_us_merge.dta'

use `precollapse.dta', clear
drop ts_hh_size
drop if statefip==. //don't want to double count data from census regions

drop *migrat*

collapse (sum) ts* POP , by(year)
gen region=0 

merge 1:1 region year using  `entire_us_merge.dta', update
drop _merge 
gen nuts="Entire U.S."

append using `append.dta'

merge 1:1 nuts year using `ts_female_emp', update
drop _merge 


* ==============================================================================
* Generate variables 
* ==============================================================================

egen ts_age_total = rsum(ts_age*)

gen ts_EMP_share_female = ts_EMP_female / ts_EMP *100

foreach var of varlist ts* {
	replace `var' = . if `var'==0
}


gen ts_edatt_mean_yrs = ts_edatt_yrs_0/ts_edatt_total*0 + /// 
ts_edatt_yrs_1/ts_edatt_total*1 + ts_edatt_yrs_2/ts_edatt_total*2 + ///
ts_edatt_yrs_3/ts_edatt_total*3 + ts_edatt_yrs_4/ts_edatt_total*4 + ///
ts_edatt_yrs_5/ts_edatt_total*5 + ts_edatt_yrs_6/ts_edatt_total*6 + ///
ts_edatt_yrs_7/ts_edatt_total*7 + ts_edatt_yrs_8/ts_edatt_total*8 + ///
ts_edatt_yrs_9/ts_edatt_total*9 + ts_edatt_yrs_10/ts_edatt_total*10 + ///
ts_edatt_yrs_11/ts_edatt_total*11 + ts_edatt_yrs_12/ts_edatt_total*12 + ///
ts_edatt_yrs_13/ts_edatt_total*13 + ts_edatt_yrs_14/ts_edatt_total*14 + ///
ts_edatt_yrs_15/ts_edatt_total*15 + ts_edatt_yrs_16/ts_edatt_total*16 + ///
ts_edatt_yrs_17/ts_edatt_total*17 + ts_edatt_yrs_18/ts_edatt_total*18 + ///
ts_edatt_yrs_19/ts_edatt_total*19 + ts_edatt_yrs_20/ts_edatt_total*20  + ///
ts_edatt_yrs_21/ts_edatt_total*21 + ts_edatt_yrs_22/ts_edatt_total*22
	
gen ts_inmigration_rate  = (ts_inmigration/(ts_migration_yrs)^(.7)) / ts_age_total *100
gen ts_outmigration_rate = (ts_outmigration/(ts_migration_yrs)^(.7))/ ts_age_total *100

gen ts_inmigration_rate_cps  = (ts_inmigration_cps/(ts_migration_yrs_cps)^(.7)) / POP *100
gen ts_outmigration_rate_cps = (ts_outmigration_cps/(ts_migration_yrs_cps)^(.7))/ POP *100
	
*********************************************

drop if year==1968|year==1969|year==2012|nuts==""

saveold "$dta_files/step0503_TS_US_merged_processed.dta", replace
